{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/experimental/merge-namespaces/merge-namespaces.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/learn/experimental/merge-namespaces/merge-namespaces.ipynb)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Merging Namespaces in a Pinecone Index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook queries vectors out of two namespaces `ns1` and `ns2` and upserts them to a new namespace named `merged`.\n",
    "\n",
    "Please note this code is **experimental** and not guaranteed to work by Pinecone. Test thoroughly before using in production."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install -qU pinecone-notebooks pinecone-client[grpc]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pinecone_notebooks.colab import Authenticate\n",
    "\n",
    "Authenticate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pinecone.grpc import PineconeGRPC as Pinecone\n",
    "from pinecone import ServerlessSpec\n",
    "import os\n",
    "\n",
    "# The generated API key is available in the PINECONE_API_KEY environment variable\n",
    "api_key = os.environ.get('PINECONE_API_KEY')\n",
    "\n",
    "# Use the API key to initialize the Pinecone client\n",
    "pc = Pinecone(api_key=api_key)\n",
    "\n",
    "# Connect to your index\n",
    "index_name = \"namespace-test\" # replace with the correct index name\n",
    "index = pc.Index(index_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "dimension = index.describe_index_stats()['dimension']\n",
    "# Function to fetch all vectors from a namespace\n",
    "def fetch_all_vectors(namespace):\n",
    "    try:\n",
    "      count = index.describe_index_stats()['namespaces'][namespace]['vector_count']\n",
    "      if count > 10000:\n",
    "          raise ValueError(\"Namespaces larger than 10000 vectors need to be handled iteratively.\")\n",
    "    except ValueError as e:\n",
    "        print(f\"Error: {e}\")\n",
    "        return []\n",
    "    random_vecs = [random.random() for _ in range(dimension)]\n",
    "    response = index.query(\n",
    "        namespace=namespace,\n",
    "        vector=random_vecs,\n",
    "        top_k=count,\n",
    "        include_values=True,\n",
    "        include_metadata=True\n",
    "    )\n",
    "    return response['matches']\n",
    "\n",
    "# Fetch vectors from ns1 and ns2\n",
    "vectors_ns1 = fetch_all_vectors(\"ns1\")\n",
    "vectors_ns2 = fetch_all_vectors(\"ns2\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert fetched vectors to the required upsert format\n",
    "def format_vectors_for_upsert(fetched_vectors):\n",
    "    return [{\"id\": match['id'], \n",
    "            \"values\": match['values'], \n",
    "            \"metadata\": match['metadata']} for match in fetched_vectors]\n",
    "\n",
    "formatted_vectors_ns1 = format_vectors_for_upsert(vectors_ns1)\n",
    "formatted_vectors_ns2 = format_vectors_for_upsert(vectors_ns2)\n",
    "\n",
    "print(f\"Preparing to upsert {len(formatted_vectors_ns1)} vectors from ns1 and \\\n",
    "{len(formatted_vectors_ns2)} vectors from ns2\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note that any vectors with overlapping IDs between `ns1` and `ns2` will be overwritten by the `ns2` upsert."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from itertools import islice\n",
    "\n",
    "# Upsert vectors in batches of 100\n",
    "def chunks(data, size=100):\n",
    "    it = iter(data)\n",
    "    for chunk in iter(lambda: tuple(islice(it, size)), ()):\n",
    "        yield chunk\n",
    "\n",
    "# Upsert vectors into the merged namespace\n",
    "target_namespace = 'merged'\n",
    "for batch in chunks(formatted_vectors_ns1):\n",
    "    index.upsert(vectors=batch, namespace=target_namespace)\n",
    "\n",
    "for batch in chunks(formatted_vectors_ns2):\n",
    "    index.upsert(vectors=batch, namespace=target_namespace)\n",
    "\n",
    "print(f\"Upserted {len(formatted_vectors_ns1)} vectors from ns1 and \\\n",
    "{len(formatted_vectors_ns2)} vectors from ns2 into {target_namespace}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
